import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import calendar
amazon_df= pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AMZN?period1=1656862000&period2=1688398000&interval=1d&events=history&includeAdjustedClose=true')
amazon_df.head()
| Date | Open | High | Low | Close | Adj Close | Volume | |
|---|---|---|---|---|---|---|---|
| 0 | 2022-07-05 | 107.599998 | 114.080002 | 106.320000 | 113.500000 | 113.500000 | 76583700 |
| 1 | 2022-07-06 | 113.209999 | 115.480003 | 112.010002 | 114.330002 | 114.330002 | 66958900 |
| 2 | 2022-07-07 | 113.849998 | 116.989998 | 113.489998 | 116.330002 | 116.330002 | 57872300 |
| 3 | 2022-07-08 | 114.599998 | 116.580002 | 113.690002 | 115.540001 | 115.540001 | 45719700 |
| 4 | 2022-07-11 | 114.080002 | 114.300003 | 110.870003 | 111.750000 | 111.750000 | 53487600 |
amazon_df.isna().any()
Date False Open False High False Low False Close False Adj Close False Volume False dtype: bool
amazon_df.isna().sum() #no null values
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
amazon_df.dtypes
Date object Open float64 High float64 Low float64 Close float64 Adj Close float64 Volume int64 dtype: object
amazon_df['new_Date']=pd.to_datetime(amazon_df['Date'],format='%Y-%m-%d')
amazon_df['new_Date']
0 2022-07-05
1 2022-07-06
2 2022-07-07
3 2022-07-08
4 2022-07-11
...
246 2023-06-27
247 2023-06-28
248 2023-06-29
249 2023-06-30
250 2023-07-03
Name: new_Date, Length: 251, dtype: datetime64[ns]
year=[]
month=[]
day=[]
dayofweek=[]
weekday=[]
for i in amazon_df['new_Date']:
year.append(i.year)
month.append(i.month)
day.append(i.day)
dayofweek.append(i.dayofweek)
weekday.append(calendar.day_name[dayofweek[-1]])
amazon_df['YEAR']=year
amazon_df['MONTH'] = month
amazon_df['DAY']=day
amazon_df['DAY OF WEEK'] = dayofweek
amazon_df['WEEKDAY'] = weekday
amazon_df.head()
| Date | Open | High | Low | Close | Adj Close | Volume | new_Date | YEAR | MONTH | DAY | DAY OF WEEK | WEEKDAY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-07-05 | 107.599998 | 114.080002 | 106.320000 | 113.500000 | 113.500000 | 76583700 | 2022-07-05 | 2022 | 7 | 5 | 1 | Tuesday |
| 1 | 2022-07-06 | 113.209999 | 115.480003 | 112.010002 | 114.330002 | 114.330002 | 66958900 | 2022-07-06 | 2022 | 7 | 6 | 2 | Wednesday |
| 2 | 2022-07-07 | 113.849998 | 116.989998 | 113.489998 | 116.330002 | 116.330002 | 57872300 | 2022-07-07 | 2022 | 7 | 7 | 3 | Thursday |
| 3 | 2022-07-08 | 114.599998 | 116.580002 | 113.690002 | 115.540001 | 115.540001 | 45719700 | 2022-07-08 | 2022 | 7 | 8 | 4 | Friday |
| 4 | 2022-07-11 | 114.080002 | 114.300003 | 110.870003 | 111.750000 | 111.750000 | 53487600 | 2022-07-11 | 2022 | 7 | 11 | 0 | Monday |
plt.figure(figsize=(10,5))
plt.plot(amazon_df['new_Date'],amazon_df['Close'])
plt.title('Amazon Stock Price Over Time')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.show()
plt.figure(figsize=(10,5))
plt.plot(amazon_df['new_Date'],amazon_df['Volume'])
plt.title('Amazon Stock Volume traded Over Time')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.show()
amazon_df['MA50'] = amazon_df['Close'].rolling(window=50).mean()
amazon_df['MA200'] = amazon_df['Close'].rolling(window=200).mean()
plt.plot(amazon_df['Date'], amazon_df['Close'], label='Amazon')
plt.plot(amazon_df['Date'], amazon_df['MA50'], label='MA50')
plt.plot(amazon_df['Date'], amazon_df['MA200'], label='MA200')
plt.title('Amazon Stock Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()
amazon_df['Daily Return'] = amazon_df['Close'].pct_change()
print('Daily return average of Amazon stock: ', round(amazon_df['Daily Return'].mean()*100, 2), '%')
Daily return average of Amazon stock: 0.09 %
amazon_df['Daily Return']
0 NaN
1 0.007313
2 0.017493
3 -0.006791
4 -0.032803
...
246 0.014529
247 -0.001084
248 -0.008834
249 0.019234
250 -0.001074
Name: Daily Return, Length: 251, dtype: float64
amazon_df['Trend'] = ['Up' if x >= 0 else 'Down' for x in amazon_df['Daily Return']]
amazon_df['Trend']
0 Down
1 Up
2 Up
3 Down
4 Down
...
246 Up
247 Down
248 Down
249 Up
250 Down
Name: Trend, Length: 251, dtype: object
trend=amazon_df['Trend'].value_counts()
trend
Up 126 Down 125 Name: Trend, dtype: int64
import plotly.express as px
from IPython.display import HTML
fig=px.pie(amazon_df,
values=trend.values,
names=trend.index,
title='Amazon Stock Trend Frequency')
HTML(fig.to_html())
aapl_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AAPL?period1=1656862826&period2=1688398826&interval=1d&events=history&includeAdjustedClose=true')
goog_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period1=1656862847&period2=1688398847&interval=1d&events=history&includeAdjustedClose=true')
msft_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/MSFT?period1=1656862865&period2=1688398865&interval=1d&events=history&includeAdjustedClose=true')
amzn_df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/AMZN?period1=1656862000&period2=1688398000&interval=1d&events=history&includeAdjustedClose=true')
aapl_df.isna().sum()
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
goog_df.isna().sum()
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
msft_df.isna().sum()
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
amzn_df.isna().sum()
Date 0 Open 0 High 0 Low 0 Close 0 Adj Close 0 Volume 0 dtype: int64
amzn_df['Daily Return'] = amzn_df['Close'].pct_change()
goog_df['Daily Return'] = goog_df['Close'].pct_change()
msft_df['Daily Return'] = msft_df['Close'].pct_change()
aapl_df['Daily Return'] = aapl_df['Close'].pct_change()
returns_df = pd.concat([amzn_df['Daily Return'], goog_df['Daily Return'], msft_df['Daily Return'], aapl_df['Daily Return']], axis=1)
returns_df.columns = ['Amazon', 'Google', 'Microsoft', 'Apple']
returns_df
| Amazon | Microsoft | Apple | ||
|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN |
| 1 | 0.007313 | 0.011648 | 0.012783 | 0.009607 |
| 2 | 0.017493 | 0.035521 | 0.008227 | 0.023999 |
| 3 | -0.006791 | 0.007229 | -0.002757 | 0.004715 |
| 4 | -0.032803 | -0.030341 | -0.011769 | -0.014758 |
| ... | ... | ... | ... | ... |
| 246 | 0.014529 | -0.000672 | 0.018168 | 0.015059 |
| 247 | -0.001084 | 0.017393 | 0.003826 | 0.006328 |
| 248 | -0.008834 | -0.008837 | -0.002382 | 0.001797 |
| 249 | 0.019234 | 0.007999 | 0.016386 | 0.023103 |
| 250 | -0.001074 | -0.003389 | -0.007488 | -0.007785 |
251 rows × 4 columns
# calculate the correlation between daily returns of different stocks
corr_matrix = returns_df.corr()
# print the correlation matrix
print(corr_matrix)
Amazon Google Microsoft Apple Amazon 1.000000 0.705563 0.715576 0.640828 Google 0.705563 1.000000 0.790127 0.705345 Microsoft 0.715576 0.790127 1.000000 0.727176 Apple 0.640828 0.705345 0.727176 1.000000